In [19]:
# Question 1 Local Machine
import torch

if torch.cuda.is_available():
    print("GPU is available.")
    print("GPU device name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())
else:
    print("GPU is not available; using CPU.")
GPU is not available; using CPU.

Question 1 result from google collab¶

import torch

if torch.cuda.is_available():
    print("GPU is available.")
    print("GPU device name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())
else:
    print("GPU is not available; using CPU.")
    

GPU is available. GPU device name: NVIDIA A100-SXM4-80GB Number of GPUs: 1

IMG_7968.JPG IMG_7967.JPG

In [51]:
# Question 2, Use Calculus and PyTorch to Derive the Gradient Descent


import torch
import math
import sympy as sp


def loss(x, y):
    try:
        return x / (y + x**2) + torch.exp(-y * x)
    except (ZeroDivisionError, RuntimeError):
        return torch.tensor(float('inf'))

def dloss_dx(x, y):
    try:
        return (y + x**2 - 2*x**2) / (y + x**2)**2 - y * torch.exp(-y * x)
    except ZeroDivisionError:
        return float('inf')

def dloss_dy(x, y):
    try:
        return -x / (y + x**2)**2 + x * torch.exp(-y * x)
    except ZeroDivisionError:
        return float('inf')

if __name__ == "__main__":

    print("\nUsing PyTorch autograd:")

    x1 = torch.tensor(0.0, requires_grad=True)
    y1 = torch.tensor(1.0, requires_grad=True)

    x2 = torch.tensor(1.0, requires_grad=True)
    y2 = torch.tensor(-1.0, requires_grad=True)

    l1 = loss(x1, y1)
    l2 = loss(x2, y2)
    l1.backward()
    l2.backward()

    print("Grad at (0,1):")
    print("dl/dx =", x1.grad.item())
    print("dl/dy =", y1.grad.item())
    print("\nGrad at (1,-1):")
    print("dl/dx =", x2.grad.item())
    print("dl/dy =", y2.grad.item())
Using PyTorch autograd:
Grad at (0,1):
dl/dx = 0.0
dl/dy = -0.0

Grad at (1,-1):
dl/dx = nan
dl/dy = -inf
In [ ]:
# Question 3

%matplotlib inline
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
torch.set_printoptions(edgeitems=2, linewidth=75)


t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_f = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_fn = 0.1 * t_f 

add_size = 100 - t_c.size(0)


t_c_added = torch.linspace(-25, 45, steps=add_size)
t_f_added = (9.0 / 5.0) * t_c_added + 32.0

t_c = torch.cat((t_c, t_c_added), 0)
t_f = torch.cat((t_f, t_f_added), 0)

def add_gaussian_noise(t, mean=0.0, std=3.0):
    noise = torch.randn(t.size()) * std + mean
    return t + noise
t_f_noisy = add_gaussian_noise(t_f)
t_c_noisy = add_gaussian_noise(t_c)


t_f_noisy = t_f_noisy.numpy()
t_c_noisy = t_c_noisy.numpy()

t_f_train, t_f_val, t_c_train, t_c_val = train_test_split(
    t_f_noisy, t_c_noisy, test_size=0.7, random_state=42)

# back to tensors after splitting

t_f_train = torch.tensor(t_f_train, dtype=torch.float32)
t_c_train = torch.tensor(t_c_train, dtype=torch.float32)
t_f_val = torch.tensor(t_f_val, dtype=torch.float32)
t_c_val = torch.tensor(t_c_val, dtype=torch.float32)

def model(t_c, w, b): 
    return w * t_c + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()


w = torch.tensor(1.0, device='cpu')
b = torch.tensor(0.0, device='cpu')

train_dataset = TensorDataset(t_f_train, t_c_train,)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)

val_dataset = TensorDataset(t_f_val, t_c_val)
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=False)


def train_step(w,b, lr=0.1, epochs=3000):
    w.requires_grad_(True)
    b.requires_grad_(True)

    optim = torch.optim.Adam([w, b], lr=lr)
    for epoch in range(epochs):
        total_loss = 0
        for t_f_batch, t_c_batch in train_loader:
            t_f_batch = t_f_batch.to(device='cpu')
            t_c_batch = t_c_batch.to(device='cpu')


            t_p = model(t_c_batch, w, b) # predicted farenhite temperature
            loss = loss_fn(t_p, t_f_batch) # compute the loss for this batch

            optim.zero_grad() 
            loss.backward() # backwards pass to compute gradients
            optim.step() # update weights and bias
            
            total_loss += loss.item() # accumulate loss over batches
        if (epoch + 1) % 10 == 0: 
            mean_loss = total_loss / len(train_loader)
            print(f"Epoch {epoch+1}/{epochs}: w={w.item():.4f}, b={b.item():.4f}, Loss: {mean_loss:.4f}")
    return w, b, mean_loss

w, b, train_loss = train_step(w, b)

# Evaluate on validation set using our discovered parameters w and b, we will not update w and b during this evaluation, we just want to see how well our model performs on the validation set after training.
def evaluate(w, b):
    total_val_loss = 0
    with torch.no_grad():
        for t_f, t_c in val_loader:
            t_f = t_f.to(device='cpu')
            t_c = t_c.to(device='cpu')
            t_p = model(t_c, w, b)
            loss = loss_fn(t_p, t_f)
    return total_val_loss / len(val_loader)

val_loss = evaluate(w, b)
print(f"\nValidation Loss: {val_loss:.4f}")



print(f"\nFinal parameters: w={w.item():.4f}, b={b.item():.4f}, train loss={train_loss:.4f}, val loss={val_loss:.4f}")

import matplotlib.pyplot as plt
t_c_range = t_c_noisy
# predicted farenhite temperature for the range of celsius values, this incluses both train and validation data since we already know what our weight and bias are after training.
t_f_pred = model(torch.tensor(t_c_range, dtype=torch.float32), w, b) 

plt.scatter(t_c_train, t_f_train, label='Noisy Data', color='blue', s=10) # view training data points
plt.plot(t_c_range, t_f_pred.detach().numpy(), label='Predicted Line', color='red') # view the predicted line based on the trained model

plt.xlabel('Celsius')
plt.ylabel('Fahrenheit')
plt.legend()
plt.show()
Epoch 10/3000: w=2.6761, b=2.6801, Loss: 678.6278
Epoch 20/3000: w=2.3078, b=4.9387, Loss: 557.1894
Epoch 30/3000: w=2.3227, b=7.3752, Loss: 465.7780
Epoch 40/3000: w=2.2548, b=9.7202, Loss: 389.3216
Epoch 50/3000: w=2.2028, b=11.9757, Loss: 321.3073
Epoch 60/3000: w=2.1599, b=14.1165, Loss: 264.1721
Epoch 70/3000: w=2.1076, b=16.1283, Loss: 215.4898
Epoch 80/3000: w=2.0472, b=18.0132, Loss: 175.2204
Epoch 90/3000: w=2.0095, b=19.7648, Loss: 141.2093
Epoch 100/3000: w=1.9732, b=21.3766, Loss: 114.1168
Epoch 110/3000: w=1.9414, b=22.8558, Loss: 92.9367
Epoch 120/3000: w=1.9071, b=24.1936, Loss: 75.4757
Epoch 130/3000: w=1.8682, b=25.4030, Loss: 62.3735
Epoch 140/3000: w=1.8483, b=26.4875, Loss: 52.1964
Epoch 150/3000: w=1.8303, b=27.4528, Loss: 44.2304
... (output truncated for brevity) ...
Epoch 2950/3000: w=1.6950, b=32.9371, Loss: 23.7313
Epoch 2960/3000: w=1.6919, b=32.9108, Loss: 24.0696
Epoch 2970/3000: w=1.7011, b=32.9002, Loss: 24.2035
Epoch 2980/3000: w=1.7075, b=32.9033, Loss: 24.3564
Epoch 2990/3000: w=1.6514, b=32.9009, Loss: 25.5763
Epoch 3000/3000: w=1.6717, b=32.9500, Loss: 24.2767

Validation Loss: 0.0000

Final parameters: w=1.6717, b=32.9500, train loss=24.2767, val loss=0.0000
No description has been provided for this image